From ed5cefb037c2ba5b632d7dea5b8b004af53b51a9 Mon Sep 17 00:00:00 2001 From: Vincent Li Date: Sun, 7 Dec 2025 07:53:07 -0800 Subject: [PATCH] loongarch64: backport kernel BPF trampoline Enable xdp-loader to attach multiple XDP programs to a single interface by backporting the BPF trampoline implementation from Linux kernel 6.17 to 6.12 for LoongArch64. The xdp-loader utility relies on libxdp, which in turn requires kernel support for BPF trampoline. While x86_64 and other architectures have this feature, LoongArch64 only gained it in kernel 6.17. Without this backport, xdp-loader fails on LoongArch64 systems running kernel 6.12. Changes backported include: - BPF trampoline infrastructure for LoongArch64 - Necessary JIT compiler updates - Related BPF subsystem changes This allows full compatibility with the xdp-tools ecosystem on LoongArch64 systems running older kernel versions. Reference: https://github.com/xdp-project/xdp-tools/tree/main/lib/libxdp Signed-off-by: Vincent Li Link: https://github.com/openwrt/openwrt/pull/21077 Signed-off-by: Christian Marangi --- ...h-Add-larch_insn_gen_beq_bne-helpers.patch | 68 +++ ...rename-validate_code-to-validate_ctx.patch | 52 ++ ...dd-dynamic-code-modification-support.patch | 240 +++++++++ ...BPF-Add-basic-bpf-trampoline-support.patch | 467 ++++++++++++++++++ 4 files changed, 827 insertions(+) create mode 100644 target/linux/loongarch64/patches-6.12/001-v6.17-LoongArch-Add-larch_insn_gen_beq_bne-helpers.patch create mode 100644 target/linux/loongarch64/patches-6.12/002-v6.17-LoongArch-BPF-Update-the-code-to-rename-validate_code-to-validate_ctx.patch create mode 100644 target/linux/loongarch64/patches-6.12/003-v6.17-loongArch-BPF-Add-dynamic-code-modification-support.patch create mode 100644 target/linux/loongarch64/patches-6.12/004-v6.17-LoongArch-BPF-Add-basic-bpf-trampoline-support.patch diff --git a/target/linux/loongarch64/patches-6.12/001-v6.17-LoongArch-Add-larch_insn_gen_beq_bne-helpers.patch b/target/linux/loongarch64/patches-6.12/001-v6.17-LoongArch-Add-larch_insn_gen_beq_bne-helpers.patch new file mode 100644 index 0000000000..971ef78034 --- /dev/null +++ b/target/linux/loongarch64/patches-6.12/001-v6.17-LoongArch-Add-larch_insn_gen_beq_bne-helpers.patch @@ -0,0 +1,68 @@ +From 6ab55e0a9eac638ca390bfaef6408c10c127e623 Mon Sep 17 00:00:00 2001 +From: Chenghao Duan +Date: Sun, 3 Aug 2025 22:49:50 +0800 +Subject: [PATCH 01/12] LoongArch: Add larch_insn_gen_{beq,bne} helpers + +Add larch_insn_gen_beq() and larch_insn_gen_bne() helpers which will be +used in BPF trampoline implementation. + +Reviewed-by: Hengqi Chen +Co-developed-by: George Guo +Signed-off-by: George Guo +Co-developed-by: Youling Tang +Signed-off-by: Youling Tang +Signed-off-by: Chenghao Duan +Signed-off-by: Huacai Chen +--- + arch/loongarch/include/asm/inst.h | 2 ++ + arch/loongarch/kernel/inst.c | 28 ++++++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +--- a/arch/loongarch/include/asm/inst.h ++++ b/arch/loongarch/include/asm/inst.h +@@ -515,6 +515,8 @@ u32 larch_insn_gen_move(enum loongarch_g + u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm); + u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); + u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); ++u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); ++u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); + u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); + + static inline bool signed_imm_check(long val, unsigned int bit) +--- a/arch/loongarch/kernel/inst.c ++++ b/arch/loongarch/kernel/inst.c +@@ -335,6 +335,34 @@ u32 larch_insn_gen_lu52id(enum loongarch + return insn.word; + } + ++u32 larch_insn_gen_beq(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) ++{ ++ union loongarch_instruction insn; ++ ++ if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { ++ pr_warn("The generated beq instruction is out of range.\n"); ++ return INSN_BREAK; ++ } ++ ++ emit_beq(&insn, rj, rd, imm >> 2); ++ ++ return insn.word; ++} ++ ++u32 larch_insn_gen_bne(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) ++{ ++ union loongarch_instruction insn; ++ ++ if ((imm & 3) || imm < -SZ_128K || imm >= SZ_128K) { ++ pr_warn("The generated bne instruction is out of range.\n"); ++ return INSN_BREAK; ++ } ++ ++ emit_bne(&insn, rj, rd, imm >> 2); ++ ++ return insn.word; ++} ++ + u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) + { + union loongarch_instruction insn; diff --git a/target/linux/loongarch64/patches-6.12/002-v6.17-LoongArch-BPF-Update-the-code-to-rename-validate_code-to-validate_ctx.patch b/target/linux/loongarch64/patches-6.12/002-v6.17-LoongArch-BPF-Update-the-code-to-rename-validate_code-to-validate_ctx.patch new file mode 100644 index 0000000000..5ef449561b --- /dev/null +++ b/target/linux/loongarch64/patches-6.12/002-v6.17-LoongArch-BPF-Update-the-code-to-rename-validate_code-to-validate_ctx.patch @@ -0,0 +1,52 @@ +From ed1a1fe6ec5e73b23b310b434ace07d1e5060657 Mon Sep 17 00:00:00 2001 +From: Chenghao Duan +Date: Tue, 5 Aug 2025 19:00:18 +0800 +Subject: [PATCH 02/12] LoongArch: BPF: Rename and refactor validate_code() + +1. Rename the existing validate_code() to validate_ctx() +2. Factor out the code validation handling into a new helper + validate_code() + +Then: + +* validate_code() is used to check the validity of code. +* validate_ctx() is used to check both code validity and table entry + correctness. + +The new validate_code() will be used in subsequent changes. + +Reviewed-by: Hengqi Chen +Co-developed-by: George Guo +Signed-off-by: George Guo +Signed-off-by: Chenghao Duan +Signed-off-by: Huacai Chen +--- + arch/loongarch/net/bpf_jit.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -1170,6 +1170,14 @@ static int validate_code(struct jit_ctx + return -1; + } + ++ return 0; ++} ++ ++static int validate_ctx(struct jit_ctx *ctx) ++{ ++ if (validate_code(ctx)) ++ return -1; ++ + if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries)) + return -1; + +@@ -1278,7 +1286,7 @@ skip_init_ctx: + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code */ +- if (validate_code(&ctx)) { ++ if (validate_ctx(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_offset; diff --git a/target/linux/loongarch64/patches-6.12/003-v6.17-loongArch-BPF-Add-dynamic-code-modification-support.patch b/target/linux/loongarch64/patches-6.12/003-v6.17-loongArch-BPF-Add-dynamic-code-modification-support.patch new file mode 100644 index 0000000000..e011b2b31b --- /dev/null +++ b/target/linux/loongarch64/patches-6.12/003-v6.17-loongArch-BPF-Add-dynamic-code-modification-support.patch @@ -0,0 +1,240 @@ +From 9fbd18cf4c69f512f7de3ab73235078f3e32ecec Mon Sep 17 00:00:00 2001 +From: Chenghao Duan +Date: Tue, 5 Aug 2025 19:00:18 +0800 +Subject: [PATCH 03/12] LoongArch: BPF: Add dynamic code modification support + +This commit adds support for BPF dynamic code modification on the +LoongArch architecture: +1. Add bpf_arch_text_copy() for instruction block copying. +2. Add bpf_arch_text_poke() for runtime instruction patching. +3. Add bpf_arch_text_invalidate() for code invalidation. + +On LoongArch, since symbol addresses in the direct mapping region can't +be reached via relative jump instructions from the paged mapping region, +we use the move_imm+jirl instruction pair as absolute jump instructions. +These require 2-5 instructions, so we reserve 5 NOP instructions in the +program as placeholders for function jumps. + +The larch_insn_text_copy() function is solely used for BPF. And the use +of larch_insn_text_copy() requires PAGE_SIZE alignment. Currently, only +the size of the BPF trampoline is page-aligned. + +Co-developed-by: George Guo +Signed-off-by: George Guo +Signed-off-by: Chenghao Duan +Signed-off-by: Huacai Chen +--- + arch/loongarch/include/asm/inst.h | 1 + + arch/loongarch/kernel/inst.c | 46 +++++++++++++ + arch/loongarch/net/bpf_jit.c | 105 +++++++++++++++++++++++++++++- + 3 files changed, 151 insertions(+), 1 deletion(-) + +--- a/arch/loongarch/include/asm/inst.h ++++ b/arch/loongarch/include/asm/inst.h +@@ -502,6 +502,7 @@ void arch_simulate_insn(union loongarch_ + int larch_insn_read(void *addr, u32 *insnp); + int larch_insn_write(void *addr, u32 insn); + int larch_insn_patch_text(void *addr, u32 insn); ++int larch_insn_text_copy(void *dst, void *src, size_t len); + + u32 larch_insn_gen_nop(void); + u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); +--- a/arch/loongarch/kernel/inst.c ++++ b/arch/loongarch/kernel/inst.c +@@ -4,6 +4,8 @@ + */ + #include + #include ++#include ++#include + + #include + #include +@@ -229,6 +231,50 @@ int larch_insn_patch_text(void *addr, u3 + + return ret; + } ++ ++struct insn_copy { ++ void *dst; ++ void *src; ++ size_t len; ++ unsigned int cpu; ++}; ++ ++static int text_copy_cb(void *data) ++{ ++ int ret = 0; ++ struct insn_copy *copy = data; ++ ++ if (smp_processor_id() == copy->cpu) { ++ ret = copy_to_kernel_nofault(copy->dst, copy->src, copy->len); ++ if (ret) ++ pr_err("%s: operation failed\n", __func__); ++ } ++ ++ flush_icache_range((unsigned long)copy->dst, (unsigned long)copy->dst + copy->len); ++ ++ return ret; ++} ++ ++int larch_insn_text_copy(void *dst, void *src, size_t len) ++{ ++ int ret = 0; ++ size_t start, end; ++ struct insn_copy copy = { ++ .dst = dst, ++ .src = src, ++ .len = len, ++ .cpu = smp_processor_id(), ++ }; ++ ++ start = round_down((size_t)dst, PAGE_SIZE); ++ end = round_up((size_t)dst + len, PAGE_SIZE); ++ ++ set_memory_rw(start, (end - start) / PAGE_SIZE); ++ ret = stop_machine(text_copy_cb, ©, cpu_online_mask); ++ set_memory_rox(start, (end - start) / PAGE_SIZE); ++ ++ return ret; ++} + + u32 larch_insn_gen_nop(void) + { +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -4,8 +4,12 @@ + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ ++#include + #include "bpf_jit.h" + ++#define LOONGARCH_LONG_JUMP_NINSNS 5 ++#define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) ++ + #define REG_TCC LOONGARCH_GPR_A6 + #define TCC_SAVED LOONGARCH_GPR_S5 + +@@ -88,7 +92,7 @@ static u8 tail_call_reg(struct jit_ctx * + */ + static void build_prologue(struct jit_ctx *ctx) + { +- int stack_adjust = 0, store_offset, bpf_stack_adjust; ++ int i, stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + +@@ -98,6 +102,10 @@ static void build_prologue(struct jit_ct + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + ++ /* Reserve space for the move_imm + jirl instruction */ ++ for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) ++ emit_insn(ctx, nop); ++ + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is +@@ -1184,6 +1192,101 @@ static int validate_ctx(struct jit_ctx * + return 0; + } + ++static int emit_jump_and_link(struct jit_ctx *ctx, u8 rd, u64 target) ++{ ++ if (!target) { ++ pr_err("bpf_jit: jump target address is error\n"); ++ return -EFAULT; ++ } ++ ++ move_imm(ctx, LOONGARCH_GPR_T1, target, false); ++ emit_insn(ctx, jirl, rd, LOONGARCH_GPR_T1, 0); ++ ++ return 0; ++} ++ ++static int emit_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) ++{ ++ int i; ++ struct jit_ctx ctx; ++ ++ ctx.idx = 0; ++ ctx.image = (union loongarch_instruction *)insns; ++ ++ if (!target) { ++ for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) ++ emit_insn((&ctx), nop); ++ return 0; ++ } ++ ++ return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target); ++} ++ ++void *bpf_arch_text_copy(void *dst, void *src, size_t len) ++{ ++ int ret; ++ ++ mutex_lock(&text_mutex); ++ ret = larch_insn_text_copy(dst, src, len); ++ mutex_unlock(&text_mutex); ++ ++ return ret ? ERR_PTR(-EINVAL) : dst; ++} ++ ++int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, ++ void *old_addr, void *new_addr) ++{ ++ int ret; ++ bool is_call = (poke_type == BPF_MOD_CALL); ++ u32 old_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; ++ u32 new_insns[LOONGARCH_LONG_JUMP_NINSNS] = {[0 ... 4] = INSN_NOP}; ++ ++ if (!is_kernel_text((unsigned long)ip) && ++ !is_bpf_text_address((unsigned long)ip)) ++ return -ENOTSUPP; ++ ++ ret = emit_jump_or_nops(old_addr, ip, old_insns, is_call); ++ if (ret) ++ return ret; ++ ++ if (memcmp(ip, old_insns, LOONGARCH_LONG_JUMP_NBYTES)) ++ return -EFAULT; ++ ++ ret = emit_jump_or_nops(new_addr, ip, new_insns, is_call); ++ if (ret) ++ return ret; ++ ++ mutex_lock(&text_mutex); ++ if (memcmp(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES)) ++ ret = larch_insn_text_copy(ip, new_insns, LOONGARCH_LONG_JUMP_NBYTES); ++ mutex_unlock(&text_mutex); ++ ++ return ret; ++} ++ ++int bpf_arch_text_invalidate(void *dst, size_t len) ++{ ++ int i; ++ int ret = 0; ++ u32 *inst; ++ ++ inst = kvmalloc(len, GFP_KERNEL); ++ if (!inst) ++ return -ENOMEM; ++ ++ for (i = 0; i < (len / sizeof(u32)); i++) ++ inst[i] = INSN_BREAK; ++ ++ mutex_lock(&text_mutex); ++ if (larch_insn_text_copy(dst, inst, len)) ++ ret = -EINVAL; ++ mutex_unlock(&text_mutex); ++ ++ kvfree(inst); ++ ++ return ret; ++} ++ + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + { + bool tmp_blinded = false, extra_pass = false; diff --git a/target/linux/loongarch64/patches-6.12/004-v6.17-LoongArch-BPF-Add-basic-bpf-trampoline-support.patch b/target/linux/loongarch64/patches-6.12/004-v6.17-LoongArch-BPF-Add-basic-bpf-trampoline-support.patch new file mode 100644 index 0000000000..93bf9d986e --- /dev/null +++ b/target/linux/loongarch64/patches-6.12/004-v6.17-LoongArch-BPF-Add-basic-bpf-trampoline-support.patch @@ -0,0 +1,467 @@ +From f9b6b41f0cf31791541cea9644ddbedb46465801 Mon Sep 17 00:00:00 2001 +From: Chenghao Duan +Date: Tue, 5 Aug 2025 19:00:18 +0800 +Subject: [PATCH 04/12] LoongArch: BPF: Add basic bpf trampoline support + +BPF trampoline is the critical infrastructure of the BPF subsystem, +acting as a mediator between kernel functions and BPF programs. Numerous +important features, such as using BPF program for zero overhead kernel +introspection, rely on this key component. + +The related tests have passed, including the following technical points: +1. fentry +2. fmod_ret +3. fexit + +The following related testcases passed on LoongArch: +sudo ./test_progs -a fentry_test/fentry +sudo ./test_progs -a fexit_test/fexit +sudo ./test_progs -a fentry_fexit +sudo ./test_progs -a modify_return +sudo ./test_progs -a fexit_sleep +sudo ./test_progs -a test_overhead +sudo ./test_progs -a trampoline_count + +This issue was first reported by Geliang Tang in June 2024 while +debugging MPTCP BPF selftests on a LoongArch machine (see commit +eef0532e900c "selftests/bpf: Null checks for links in bpf_tcp_ca"). +Geliang, Huacai, and Tiezhu then worked together to drive the +implementation of this feature, encouraging broader collaboration among +Chinese kernel engineers. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202507100034.wXofj6VX-lkp@intel.com/ +Reported-by: Geliang Tang +Tested-by: Tiezhu Yang +Tested-by: Vincent Li +Co-developed-by: George Guo +Signed-off-by: George Guo +Signed-off-by: Chenghao Duan +Signed-off-by: Huacai Chen +--- + arch/loongarch/net/bpf_jit.c | 377 +++++++++++++++++++++++++++++++++++ + arch/loongarch/net/bpf_jit.h | 6 + + 2 files changed, 383 insertions(+) + +--- a/arch/loongarch/net/bpf_jit.c ++++ b/arch/loongarch/net/bpf_jit.c +@@ -7,9 +7,15 @@ + #include + #include "bpf_jit.h" + ++#define LOONGARCH_MAX_REG_ARGS 8 ++ + #define LOONGARCH_LONG_JUMP_NINSNS 5 + #define LOONGARCH_LONG_JUMP_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) + ++#define LOONGARCH_FENTRY_NINSNS 2 ++#define LOONGARCH_FENTRY_NBYTES (LOONGARCH_FENTRY_NINSNS * 4) ++#define LOONGARCH_BPF_FENTRY_NBYTES (LOONGARCH_LONG_JUMP_NINSNS * 4) ++ + #define REG_TCC LOONGARCH_GPR_A6 + #define TCC_SAVED LOONGARCH_GPR_S5 + +@@ -1222,6 +1228,11 @@ static int emit_jump_or_nops(void *targe + return emit_jump_and_link(&ctx, is_call ? LOONGARCH_GPR_T0 : LOONGARCH_GPR_ZERO, (u64)target); + } + ++static int emit_call(struct jit_ctx *ctx, u64 addr) ++{ ++ return emit_jump_and_link(ctx, LOONGARCH_GPR_RA, addr); ++} ++ + void *bpf_arch_text_copy(void *dst, void *src, size_t len) + { + int ret; +@@ -1287,6 +1298,372 @@ int bpf_arch_text_invalidate(void *dst, + return ret; + } + ++static void store_args(struct jit_ctx *ctx, int nargs, int args_off) ++{ ++ int i; ++ ++ for (i = 0; i < nargs; i++) { ++ emit_insn(ctx, std, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); ++ args_off -= 8; ++ } ++} ++ ++static void restore_args(struct jit_ctx *ctx, int nargs, int args_off) ++{ ++ int i; ++ ++ for (i = 0; i < nargs; i++) { ++ emit_insn(ctx, ldd, LOONGARCH_GPR_A0 + i, LOONGARCH_GPR_FP, -args_off); ++ args_off -= 8; ++ } ++} ++ ++static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, ++ int args_off, int retval_off, int run_ctx_off, bool save_ret) ++{ ++ int ret; ++ u32 *branch; ++ struct bpf_prog *p = l->link.prog; ++ int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); ++ ++ if (l->cookie) { ++ move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false); ++ emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); ++ } else { ++ emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off); ++ } ++ ++ /* arg1: prog */ ++ move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); ++ /* arg2: &run_ctx */ ++ emit_insn(ctx, addid, LOONGARCH_GPR_A1, LOONGARCH_GPR_FP, -run_ctx_off); ++ ret = emit_call(ctx, (const u64)bpf_trampoline_enter(p)); ++ if (ret) ++ return ret; ++ ++ /* store prog start time */ ++ move_reg(ctx, LOONGARCH_GPR_S1, LOONGARCH_GPR_A0); ++ ++ /* ++ * if (__bpf_prog_enter(prog) == 0) ++ * goto skip_exec_of_prog; ++ */ ++ branch = (u32 *)ctx->image + ctx->idx; ++ /* nop reserved for conditional jump */ ++ emit_insn(ctx, nop); ++ ++ /* arg1: &args_off */ ++ emit_insn(ctx, addid, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -args_off); ++ if (!p->jited) ++ move_imm(ctx, LOONGARCH_GPR_A1, (const s64)p->insnsi, false); ++ ret = emit_call(ctx, (const u64)p->bpf_func); ++ if (ret) ++ return ret; ++ ++ if (save_ret) { ++ emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); ++ emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); ++ } ++ ++ /* update branch with beqz */ ++ if (ctx->image) { ++ int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branch; ++ *branch = larch_insn_gen_beq(LOONGARCH_GPR_A0, LOONGARCH_GPR_ZERO, offset); ++ } ++ ++ /* arg1: prog */ ++ move_imm(ctx, LOONGARCH_GPR_A0, (const s64)p, false); ++ /* arg2: prog start time */ ++ move_reg(ctx, LOONGARCH_GPR_A1, LOONGARCH_GPR_S1); ++ /* arg3: &run_ctx */ ++ emit_insn(ctx, addid, LOONGARCH_GPR_A2, LOONGARCH_GPR_FP, -run_ctx_off); ++ ret = emit_call(ctx, (const u64)bpf_trampoline_exit(p)); ++ ++ return ret; ++} ++ ++static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, ++ int args_off, int retval_off, int run_ctx_off, u32 **branches) ++{ ++ int i; ++ ++ emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off); ++ for (i = 0; i < tl->nr_links; i++) { ++ invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true); ++ emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off); ++ branches[i] = (u32 *)ctx->image + ctx->idx; ++ emit_insn(ctx, nop); ++ } ++} ++ ++void *arch_alloc_bpf_trampoline(unsigned int size) ++{ ++ return bpf_prog_pack_alloc(size, jit_fill_hole); ++} ++ ++void arch_free_bpf_trampoline(void *image, unsigned int size) ++{ ++ bpf_prog_pack_free(image, size); ++} ++ ++static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, ++ const struct btf_func_model *m, struct bpf_tramp_links *tlinks, ++ void *func_addr, u32 flags) ++{ ++ int i, ret, save_ret; ++ int stack_size = 0, nargs = 0; ++ int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off; ++ void *orig_call = func_addr; ++ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; ++ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; ++ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; ++ u32 **branches = NULL; ++ ++ if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) ++ return -ENOTSUPP; ++ ++ /* ++ * FP + 8 [ RA to parent func ] return address to parent ++ * function ++ * FP + 0 [ FP of parent func ] frame pointer of parent ++ * function ++ * FP - 8 [ T0 to traced func ] return address of traced ++ * function ++ * FP - 16 [ FP of traced func ] frame pointer of traced ++ * function ++ * ++ * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or ++ * BPF_TRAMP_F_RET_FENTRY_RET ++ * [ argN ] ++ * [ ... ] ++ * FP - args_off [ arg1 ] ++ * ++ * FP - nargs_off [ regs count ] ++ * ++ * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG ++ * ++ * FP - run_ctx_off [ bpf_tramp_run_ctx ] ++ * ++ * FP - sreg_off [ callee saved reg ] ++ * ++ */ ++ ++ if (m->nr_args > LOONGARCH_MAX_REG_ARGS) ++ return -ENOTSUPP; ++ ++ if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY)) ++ return -ENOTSUPP; ++ ++ stack_size = 0; ++ ++ /* Room of trampoline frame to store return address and frame pointer */ ++ stack_size += 16; ++ ++ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); ++ if (save_ret) { ++ /* Save BPF R0 and A0 */ ++ stack_size += 16; ++ retval_off = stack_size; ++ } ++ ++ /* Room of trampoline frame to store args */ ++ nargs = m->nr_args; ++ stack_size += nargs * 8; ++ args_off = stack_size; ++ ++ /* Room of trampoline frame to store args number */ ++ stack_size += 8; ++ nargs_off = stack_size; ++ ++ /* Room of trampoline frame to store ip address */ ++ if (flags & BPF_TRAMP_F_IP_ARG) { ++ stack_size += 8; ++ ip_off = stack_size; ++ } ++ ++ /* Room of trampoline frame to store struct bpf_tramp_run_ctx */ ++ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); ++ run_ctx_off = stack_size; ++ ++ stack_size += 8; ++ sreg_off = stack_size; ++ ++ stack_size = round_up(stack_size, 16); ++ ++ /* For the trampoline called from function entry */ ++ /* RA and FP for parent function */ ++ emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -16); ++ emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); ++ emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); ++ emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 16); ++ ++ /* RA and FP for traced function */ ++ emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_size); ++ emit_insn(ctx, std, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); ++ emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); ++ emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size); ++ ++ /* callee saved register S1 to pass start time */ ++ emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); ++ ++ /* store ip address of the traced function */ ++ if (flags & BPF_TRAMP_F_IP_ARG) { ++ move_imm(ctx, LOONGARCH_GPR_T1, (const s64)func_addr, false); ++ emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -ip_off); ++ } ++ ++ /* store nargs number */ ++ move_imm(ctx, LOONGARCH_GPR_T1, nargs, false); ++ emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -nargs_off); ++ ++ store_args(ctx, nargs, args_off); ++ ++ /* To traced function */ ++ /* Ftrace jump skips 2 NOP instructions */ ++ if (is_kernel_text((unsigned long)orig_call)) ++ orig_call += LOONGARCH_FENTRY_NBYTES; ++ /* Direct jump skips 5 NOP instructions */ ++ else if (is_bpf_text_address((unsigned long)orig_call)) ++ orig_call += LOONGARCH_BPF_FENTRY_NBYTES; ++ ++ if (flags & BPF_TRAMP_F_CALL_ORIG) { ++ move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); ++ ret = emit_call(ctx, (const u64)__bpf_tramp_enter); ++ if (ret) ++ return ret; ++ } ++ ++ for (i = 0; i < fentry->nr_links; i++) { ++ ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off, ++ run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); ++ if (ret) ++ return ret; ++ } ++ if (fmod_ret->nr_links) { ++ branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL); ++ if (!branches) ++ return -ENOMEM; ++ ++ invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, run_ctx_off, branches); ++ } ++ ++ if (flags & BPF_TRAMP_F_CALL_ORIG) { ++ restore_args(ctx, m->nr_args, args_off); ++ ret = emit_call(ctx, (const u64)orig_call); ++ if (ret) ++ goto out; ++ emit_insn(ctx, std, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); ++ emit_insn(ctx, std, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); ++ im->ip_after_call = ctx->ro_image + ctx->idx; ++ /* Reserve space for the move_imm + jirl instruction */ ++ for (i = 0; i < LOONGARCH_LONG_JUMP_NINSNS; i++) ++ emit_insn(ctx, nop); ++ } ++ ++ for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) { ++ int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i]; ++ *branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset); ++ } ++ ++ for (i = 0; i < fexit->nr_links; i++) { ++ ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false); ++ if (ret) ++ goto out; ++ } ++ ++ if (flags & BPF_TRAMP_F_CALL_ORIG) { ++ im->ip_epilogue = ctx->ro_image + ctx->idx; ++ move_imm(ctx, LOONGARCH_GPR_A0, (const s64)im, false); ++ ret = emit_call(ctx, (const u64)__bpf_tramp_exit); ++ if (ret) ++ goto out; ++ } ++ ++ if (flags & BPF_TRAMP_F_RESTORE_REGS) ++ restore_args(ctx, m->nr_args, args_off); ++ ++ if (save_ret) { ++ emit_insn(ctx, ldd, LOONGARCH_GPR_A0, LOONGARCH_GPR_FP, -retval_off); ++ emit_insn(ctx, ldd, regmap[BPF_REG_0], LOONGARCH_GPR_FP, -(retval_off - 8)); ++ } ++ ++ emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_FP, -sreg_off); ++ ++ /* trampoline called from function entry */ ++ emit_insn(ctx, ldd, LOONGARCH_GPR_T0, LOONGARCH_GPR_SP, stack_size - 8); ++ emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_size - 16); ++ emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_size); ++ ++ emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, 8); ++ emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, 0); ++ emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, 16); ++ ++ if (flags & BPF_TRAMP_F_SKIP_FRAME) ++ /* return to parent function */ ++ emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); ++ else ++ /* return to traced function */ ++ emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T0, 0); ++ ++ ret = ctx->idx; ++out: ++ kfree(branches); ++ ++ return ret; ++} ++ ++int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, ++ void *ro_image_end, const struct btf_func_model *m, ++ u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) ++{ ++ int ret, size; ++ void *image, *tmp; ++ struct jit_ctx ctx; ++ ++ size = ro_image_end - ro_image; ++ image = kvmalloc(size, GFP_KERNEL); ++ if (!image) ++ return -ENOMEM; ++ ++ ctx.image = (union loongarch_instruction *)image; ++ ctx.ro_image = (union loongarch_instruction *)ro_image; ++ ctx.idx = 0; ++ ++ jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); ++ ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags); ++ if (ret > 0 && validate_code(&ctx) < 0) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ tmp = bpf_arch_text_copy(ro_image, image, size); ++ if (IS_ERR(tmp)) { ++ ret = PTR_ERR(tmp); ++ goto out; ++ } ++ ++ bpf_flush_icache(ro_image, ro_image_end); ++out: ++ kvfree(image); ++ return ret < 0 ? ret : size; ++} ++ ++int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, ++ struct bpf_tramp_links *tlinks, void *func_addr) ++{ ++ int ret; ++ struct jit_ctx ctx; ++ struct bpf_tramp_image im; ++ ++ ctx.image = NULL; ++ ctx.idx = 0; ++ ++ ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags); ++ ++ /* Page align */ ++ return ret < 0 ? ret : round_up(ret * LOONGARCH_INSN_SIZE, PAGE_SIZE); ++} ++ + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) + { + bool tmp_blinded = false, extra_pass = false; +--- a/arch/loongarch/net/bpf_jit.h ++++ b/arch/loongarch/net/bpf_jit.h +@@ -18,6 +18,7 @@ struct jit_ctx { + u32 *offset; + int num_exentries; + union loongarch_instruction *image; ++ union loongarch_instruction *ro_image; + u32 stack_size; + }; + +@@ -308,3 +309,8 @@ static inline int emit_tailcall_jmp(stru + + return -EINVAL; + } ++ ++static inline void bpf_flush_icache(void *start, void *end) ++{ ++ flush_icache_range((unsigned long)start, (unsigned long)end); ++} -- 2.30.2